import altair as alt
import pandas as pd
from altair import datumhw7
Homework 7
Import Packages
alt.data_transformers.disable_max_rows()DataTransformerRegistry.enable('default')
Part 1
gas_gap_data_url = "https://calvin-data304.netlify.app/data/pump_price_for_gasoline_us_per_liter.csv"
gas_gap_data = pd.read_csv(gas_gap_data_url)
gas_gap_data.head()| country | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | ... | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 1.05 | NaN | 1.15 | NaN | 1.28 | NaN | 1.07 | NaN | 0.7 |
| 1 | Angola | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.38 | NaN | ... | NaN | 0.53 | NaN | 0.65 | NaN | 0.63 | NaN | 0.76 | NaN | 0.97 |
| 2 | Albania | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.86 | NaN | ... | NaN | 1.36 | NaN | 1.46 | NaN | 1.81 | NaN | 1.76 | NaN | 1.36 |
| 3 | Andorra | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 1.24 | NaN | 1.49 | NaN | 1.67 | NaN | 1.51 | NaN | NaN |
| 4 | UAE | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.23 | NaN | ... | NaN | 0.45 | NaN | 0.47 | NaN | 0.47 | NaN | 0.47 | NaN | 0.49 |
5 rows × 27 columns
We account for several label discrepencies between the datasets.
gas_gap_data.at[35, 'country'] = "Dem. Rep. Congo"
gas_gap_data.at[172, 'country'] = "United States of America"
gas_gap_data.at[28, 'country'] = "Central African Rep."
gas_gap_data.at[58, 'country'] = "United Kingdom"
gas_gap_data.at[152, 'country'] = "S. Sudan"
gas_gap_data.at[85, 'country'] = "Kyrgyzstan"
gas_gap_data.at[19, 'country'] = "Bosnia and Herz."
gas_gap_data.at[33, 'country'] = "Côte d'Ivoire"
gas_gap_data.at[4, 'country'] = "United Arab Emirates"
gas_gap_data.at[46, 'country'] = "Dominican Rep."
gas_gap_data.at[108, 'country'] = "Macedonia"countries = alt.topo_feature('https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json', feature='countries')
country_map = alt.Chart(countries).mark_geoshape(
fill='#aaaaaa',
stroke='#000000'
).project('mercator')
country_map.properties(width = 600, height = 400)country_map.transform_lookup(
lookup='properties.name',
from_=alt.LookupData(gas_gap_data, 'country', ['2012'])
).encode(
fill = "2012:Q",
tooltip = ["properties.name:O", "2012:Q"]
).properties(width = 600, height = 400, title="Gas rate: US $ per liter")Part 2
democracy_url = "https://calvin-data304.netlify.app/data/wvs.csv"
democracy_data = pd.read_csv(democracy_url)Wrangle the total number of respondants for each nation
respondants_per_country = pd.DataFrame(democracy_data.value_counts("country"))
respondants_per_country.reset_index(inplace=True)
respondants_per_country = respondants_per_country.rename(columns={0:"total"})base = alt.Chart(respondants_per_country).encode(
alt.X(field ='country', type="ordinal", sort="-y"),
alt.Y(field ='total', type="quantitative")
).properties(width=300,height=300,title="Number of Respondants")
base.mark_bar()Part 3
age3_facet = alt.Chart(democracy_data).mark_boxplot(extent="min-max").encode(
alt.X(field = "age", type = "quantitative", title = "Age in Years"),
alt.Y(field = "age3", type = "nominal"),
alt.Color("age3:N")
#alt.Tooltip(["min(age)", "max(age)"])
).properties(
width = 300, height = 75
).facet(
facet = "country:O",
columns=3
)
age3_facetage6_facet = alt.Chart(democracy_data).mark_boxplot(extent="min-max").encode(
alt.X(field = "age", type = "quantitative", title = "Age in Years"),
alt.Y(field = "age6", type = "nominal"),
alt.Color("age6:N")
#alt.Tooltip(["min(age)", "max(age)"])
).properties(
width = 300, height = 150
).facet(
facet = "country:O",
columns=3
)
age6_facet